McKinsey Analytics Online Hackathon

Imports


In [163]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize']=(20,10)
plt.style.use('ggplot')

Data Loading and Handling


In [172]:
sales_df = pd.read_csv('train_aWnotuB.csv', index_col='DateTime', parse_dates=True)

j1_train=df_train[df_train['Junction']==1]
j2_train=df_train[df_train['Junction']==2]
j3_train=df_train[df_train['Junction']==3]
j4_train=df_train[df_train['Junction']==4]

j1_train.drop(['Junction','ID'],1,inplace=True)
j2_train.drop(['Junction','ID'],1,inplace=True)
j3_train.drop(['Junction','ID'],1,inplace=True)
j4_train.drop(['Junction','ID'],1,inplace=True)

Junction 1


In [173]:
df = j1_train.reset_index()
df=df.rename(columns={'DateTime':'ds', 'Vehicles':'y'})
df['y'] = np.log(df['y'])

In [174]:
model = Prophet(yearly_seasonality=True)
model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
model.fit(df);

In [175]:
future = model.make_future_dataframe(periods=2952,freq='H')
forecast = model.predict(future)

In [176]:
df.set_index('ds', inplace=True)
forecast.set_index('ds', inplace=True)
viz_df_j1 = j1_train.join(forecast[['yhat', 'yhat_lower','yhat_upper']], how = 'outer')
viz_df_j1['yhat_rescaled'] = np.exp(viz_df['yhat'])

In [181]:
junction1=viz_df_j1[-2952:]
junction1['Vehicles'] = junction1['yhat_rescaled']
junction1.drop(['yhat','yhat_lower','yhat_upper','yhat_rescaled'],1,inplace=True)

Junction 2


In [183]:
df = j2_train.reset_index()
df=df.rename(columns={'DateTime':'ds', 'Vehicles':'y'})
df['y'] = np.log(df['y'])

In [184]:
model = Prophet(yearly_seasonality=True)
model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
model.fit(df);

In [185]:
future = model.make_future_dataframe(periods=2952,freq='H')
forecast = model.predict(future)

In [186]:
df.set_index('ds', inplace=True)
forecast.set_index('ds', inplace=True)
viz_df_j2 = j2_train.join(forecast[['yhat', 'yhat_lower','yhat_upper']], how = 'outer')
viz_df_j2['yhat_rescaled'] = np.exp(viz_df_j2['yhat'])

In [189]:
junction2=viz_df_j2[-2952:]
junction2['Vehicles'] = junction2['yhat_rescaled']
junction2.drop(['yhat','yhat_lower','yhat_upper','yhat_rescaled'],1,inplace=True)

Junction 3


In [265]:
df = j3_train.reset_index()
df=df.rename(columns={'DateTime':'ds', 'Vehicles':'y'})
df['y'] = np.log(df['y'])
df['cap'] = 6

In [266]:
model = Prophet(growth='logistic')
model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
model.fit(df);


INFO:fbprophet.forecaster:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.

In [267]:
future = model.make_future_dataframe(periods=2952,freq='H')
future['cap'] = 6
forecast = model.predict(future)

In [268]:
df.set_index('ds', inplace=True)
forecast.set_index('ds', inplace=True)
viz_df_j3 = j3_train.join(forecast[['yhat', 'yhat_lower','yhat_upper']], how = 'outer')
viz_df_j3['yhat_rescaled'] = np.exp(viz_df_j3['yhat'])

In [269]:
junction3=viz_df_j3[-2952:]
junction3['Vehicles'] = junction3['yhat_rescaled']
junction3.drop(['yhat','yhat_lower','yhat_upper','yhat_rescaled'],1,inplace=True)

Junction 4


In [257]:
df = j4_train.reset_index()
df=df.rename(columns={'DateTime':'ds', 'Vehicles':'y'})
df['y'] = np.log(df['y'])
df['cap'] = 4

In [258]:
model = Prophet(growth='logistic')
model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
model.fit(df);


INFO:fbprophet.forecaster:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.

In [259]:
future = model.make_future_dataframe(periods=2952,freq='H')
future['cap'] = 4
forecast = model.predict(future)

In [260]:
df.set_index('ds', inplace=True)
forecast.set_index('ds', inplace=True)
viz_df_j4 = j4_train.join(forecast[['yhat', 'yhat_lower','yhat_upper']], how = 'outer')
viz_df_j4['yhat_rescaled'] = np.exp(viz_df_j4['yhat'])

In [261]:
junction4=viz_df_j4[-2952:]
junction4['Vehicles'] = junction4['yhat_rescaled']
junction4.drop(['yhat','yhat_lower','yhat_upper','yhat_rescaled'],1,inplace=True)

Solution File


In [272]:
junction1['Junction']=1
junction2['Junction']=2
junction3['Junction']=3
junction4['Junction']=4

In [274]:
junction1['DateTime']=junction1.index
junction1=junction1[['DateTime','Junction','Vehicles']]
junction1.reset_index(drop=True,inplace=True)

In [283]:
junction2['DateTime']=junction2.index
junction2=junction2[['DateTime','Junction','Vehicles']]
junction2.reset_index(drop=True,inplace=True)

In [284]:
junction3['DateTime']=junction3.index
junction3=junction3[['DateTime','Junction','Vehicles']]
junction3.reset_index(drop=True,inplace=True)

In [285]:
junction4['DateTime']=junction4.index
junction4=junction4[['DateTime','Junction','Vehicles']]
junction4.reset_index(drop=True,inplace=True)

In [287]:
final_forecast=junction1.append(junction2,ignore_index=True)
final_forecast=final_forecast.append(junction3,ignore_index=True)
final_forecast=final_forecast.append(junction4,ignore_index=True)

In [290]:
test=pd.read_csv('test_BdBKkAj.csv')

In [292]:
test['Vehicles']=final_forecast['Vehicles']

In [293]:
test.to_csv('final_forecast.csv')

In [ ]: